# Color palette
sppcol <- c("#2EC4B6", "#FF6666", "#5D2E8C")
agecol <- c("#5D2E8C", "#2EC4B6", "#CCFF66", "#F1E8B8")
sppcol.t <- adjustcolor(sppcol, alpha.f = 0.4)

#setwd("/Users/Kadeem Gilbert/Desktop/KRP data 2019")
setwd("C:\\Users\\wengn\\Dropbox\\twn idiwn\\Post doc\\Publications\\Gilbert et al\\Data\\Data")

##########################
#      SURVEY DATA       #
# EXPLORATION & CLEAN UP #
##########################

nepdata <- read.csv("KRPdata2_2019.csv",header=T,na.strings="?") 

# define this function at end of script
pairs.cor(nepdata[c("CaCO3", "pH", "Prey", "Length", "Width", "Volume", "Plant.Length", "Internode.Length", "TotalPitchers.by.Nodes", "Pitcher.Rank")])

# Pitcher morphometric data are all right skewed
nepdata$logLength <- log(nepdata$Length)
nepdata$logWidth <- log(nepdata$Width)
nepdata$logVolume <- log(nepdata$Volume + 0.05)

# looks better now
pairs.cor(nepdata[c("CaCO3", "pH", "Prey", "logLength", "logWidth", "logVolume", "Plant.Length", "Internode.Length", "TotalPitchers.by.Nodes", "Pitcher.Rank")])

# there are only 2 missing values here. 
nepdata[which(is.na(nepdata$logLength)), c("logLength", "logWidth", "logVolume")]
# we can guess the value of logLength for row 144 easily so we don't lose a data point unnecessarily (row 17 is a lost cause)
nepdata$logLength[144] <- predict(lm(logLength ~ logVolume + logWidth, data=nepdata), newdata = nepdata[144,])

# extract PC1 of Pitcher morphometrics to obtain a pitcher size PC (but exclude row 17)
size.PCA <- prcomp(nepdata[-17, c("logLength", "logWidth", "logVolume")], scale = T, center = T)
plot(size.PCA$x[,1:2], pch = ifelse(nepdata$Morph == "Lower", 16, 17),
  col = as.numeric(as.factor(nepdata$Species[-17])))
biplot(size.PCA)
# take PC1 (but negative, so that it still represents pitcher size)
nepdata$Size.PC1 <- append(-size.PCA$x[,1], NA, after = 16)
pairs.cor(nepdata[c("CaCO3", "pH", "Prey", "Size.PC1", "Plant.Length", "Internode.Length", "TotalPitchers.by.Nodes", "Pitcher.Rank")])

# Look at distributions of pH and CaCO3 for modelling
par(mfrow = c(1,2)); hist(nepdata$pH); hist(nepdata$CaCO3)
# pH is ok actually, just not well distributed. CaCO3 can be fixed by logging
hist(nepdata$logCaCO3 <- log(nepdata$CaCO3 + 12.5))

##############
# SURVEY: pH #
##############

library(lme4)
library(MuMIn)
library(lsmeans)

all.dat <- nepdata[-which(is.na(nepdata$Size.PC1) | is.na(nepdata$pH)),]
nrow(all.dat)

### all species
summary(ph.all.full <- lmer(pH ~ Species*(Condition + Morph + Prey + Size.PC1) + (1|Site) + (1|PlantID), 
  data = all.dat, na.action = na.fail, REML = F)) 
ph.all.dredged <- dredge(ph.all.full, m.lim = c(0,6), extra = "R^2")
# It is obvious that Condition*Species are the best predictors
# but among the top two models (with dAICc < 2) Prey or Prey*Species seem also to be important predictors
ph.all.dredged[ph.all.dredged$delta<2,]

# this is the top model
summary(pH.best <- get.models(ph.all.dredged, subset=1)[[1]])

lsmeans(pH.best, pairwise~Species*Prey)

par(mar=c(10,2,2,2))
bp1 <- boxplot(pH ~ Species*Condition, data = all.dat, las = 3, xlab="", col=c("yellow", "turquoise", "pink"),
  border = c("orange", "darkgreen", "red"), xaxt = "n")

se <- function(x) sd(x)/sqrt(length(x))
ph.se <- with(all.dat, tapply(pH, list(Species, Condition), se))
ph.mean <- with(all.dat, tapply(pH, list(Species, Condition), mean))
ph.n <- with(all.dat, tapply(pH, list(Species, Condition), length))
ph.se[is.na(ph.se)] <- 0

ph.se <- ph.se[,c("Unopened", "Living", "Senescing", "Dead")]
ph.mean <- ph.mean[,c("Unopened", "Living", "Senescing", "Dead")]
ph.n <- ph.n[,c("Unopened", "Living", "Senescing", "Dead")]
ph.upp <- ph.mean + ph.se
ph.low <- ph.mean - ph.se

newprey <- seq(-0.5,3.5,len=100)
grapred <- predict(get.models(ph.all.dredged, subset=1)[[1]], re.form=NA,
  newdata=data.frame(Prey=newprey, Species="gracilis", Condition="Living"))

library(bootpredictlme4)
grapred <- predict(get.models(ph.all.dredged, subset = 1)[[1]], 
  newdata = data.frame(Prey=newprey, Species="gracilis", Condition="Living"), 
  re.form = NA, se.fit = TRUE, nsim = 100)
amppred <- predict(get.models(ph.all.dredged, subset = 1)[[1]], 
  newdata = data.frame(Prey=newprey, Species="ampullaria", Condition="Living"), 
  re.form = NA, se.fit = TRUE, nsim = 100)
rafpred <- predict(get.models(ph.all.dredged, subset = 1)[[1]], 
  newdata = data.frame(Prey=newprey, Species="rafflesiana", Condition="Living"), 
  re.form = NA, se.fit = TRUE, nsim = 100)


# pH FIG 
#pdf("pH Fig.pdf", height = 6, width = 12)
par(mar=c(5.5,5,2,2), mfrow = c(1,2))
bp1 <- barplot(ph.mean, beside = T, density = c(20, 10, 40), angle = 45, col = sppcol,
  ylim = c(1, 8), xpd = F, ylab = "Fluid pH")
box()
for(i in 1:length(c(bp1))) arrows(c(bp1)[i], c(ph.upp)[i], c(bp1)[i], c(ph.low)[i], code = 3, angle = 90, length = 0.1)
text(c(ph.upp)+0.2 ~ c(bp1), labels = c(ph.n))
legend('topright', bty  = "n", fill = sppcol, density = c(20, 10, 40), title = "Species", 
  legend = c("N. ampullaria", "N. gracilis", "N. rafflesiana"))
mtext(side = 3, adj = 0, text = " a)", line = -1)

par(mar=c(5.5,2,2,5))
plot(jitter(pH) ~ jitter(Prey), data = all.dat, type = "n",
  xlab = "Prey density", ylab = "Fluid pH", xaxt = "n", ylim = c(1,8))
axis(side = 1, at = seq(0,3,1))
polygon(c(grapred$ci.fit[1,], rev(grapred$ci.fit[2,])) ~ c(newprey, rev(newprey)), border = F, col=sppcol.t[2])
polygon(c(rafpred$ci.fit[1,], rev(rafpred$ci.fit[2,])) ~ c(newprey, rev(newprey)), border = F, col=sppcol.t[3])
lines(rafpred$fit ~ newprey, lwd=5, col="white", lty = 2)
lines(grapred$fit ~ newprey, lwd=5, col="white")
polygon(c(amppred$ci.fit[1,], rev(amppred$ci.fit[2,])) ~ c(newprey, rev(newprey)), border = F, col=sppcol.t[1])
lines(amppred$fit ~ newprey, lwd=5, col="white", lty = 3)
ptcols <- ifelse(all.dat$Species == "rafflesiana", sppcol[3], ifelse(all.dat$Species == "gracilis", sppcol[2], sppcol[1]))
points(jitter(pH) ~ jitter(Prey), data = all.dat, pch = 21,
  col = ifelse(all.dat$Condition=="Living", "white", ptcols), cex=2,
  bg = ifelse(all.dat$Condition=="Living", ptcols, "white"),
  xlab = "Prey density", ylab = "Fluid pH", xaxt = "n", ylim = c(1,8))
legend('topright', bty  = "n", col = sppcol, pch = 16, title = "Species", pt.cex = 2,
  legend = c("N. ampullaria", "N. gracilis", "N. rafflesiana"))
mtext(side = 3, adj = 0, text = " b)", line = -1)
dev.off()




###############
# SURVEY: TDS #
###############

all.dat.Ca <- nepdata[-which(is.na(nepdata$Size.PC1) | is.na(nepdata$logCaCO3)),]
nrow(all.dat.Ca)

summary(Ca.all.full <- lmer(logCaCO3 ~ Species*(Condition + Morph + Prey + Size.PC1) + (1|Site) + (1|PlantID), 
  data = all.dat.Ca, na.action = na.fail, REML = F)) 
Ca.all.dredged <- dredge(Ca.all.full, m.lim = c(0,6), extra = "R^2", rank = "AIC")
head(Ca.all.dredged)
summary(Ca.best <- get.models(Ca.all.dredged, subset=1)[[1]])
0.
col.t <- adjustcolor(sppcol, alpha.f=0.5)

boxplot(logCaCO3 ~ Species*Condition, data = all.dat.Ca, las = 3, xlab="", col=c("orange", "turquoise", "pink"))

longidat <- data.frame(
  day = c(0,1,2,4,5,6,8,9),
  hardness = c(220,150, 170, 120, 120, 100, 70, 70),
  pH = c(7, 5, 4.5, 3, NA, NA, NA, 2)
)

# dissolved minerals in gra and raff are similar:
# high in living and unopened pitchers, low in senescing and dead ones
# amp deviates from this a bit. living ptichers have low dissolved minerals still,\# while unopened ones have unusally high dissolved minerals (though sample size small)

se <- function(x) sd(x)/sqrt(length(x))
tds.mean <- with(all.dat.Ca, tapply(CaCO3, list(Species, Condition), mean))
tds.se <- with(all.dat.Ca, tapply(CaCO3, list(Species, Condition), se))
tds.n <- with(all.dat.Ca, tapply(CaCO3, list(Species, Condition), length))
tds.se[is.na(tds.se)] <- 0
tds.se <- tds.se[,c("Unopened", "Living", "Senescing", "Dead")]
tds.mean <- tds.mean[,c("Unopened", "Living", "Senescing", "Dead")]
tds.n <- tds.n[,c("Unopened", "Living", "Senescing", "Dead")]

tds.upp <- tds.mean + tds.se
tds.low <- tds.mean - tds.se

rafsizes <- seq(-0.5,4,0.1)
gnasizes <- seq(-3,1,0.1)
amppred <- predict(Ca.best, newdata = data.frame(Condition = "Living", Species = "ampullaria", Size.PC1 = gnasizes), 
  re.form = NA, se.fit = T, nsim = 100)
grapred <- predict(Ca.best, newdata = data.frame(Condition = "Living", Species = "gracilis", Size.PC1 = gnasizes), 
  re.form = NA, se.fit = T, nsim = 100)
rafpred <- predict(Ca.best, newdata = data.frame(Condition = "Living", Species = "rafflesiana", Size.PC1 = rafsizes), 
  re.form = NA, se.fit = T, nsim = 100)

#pdf("TDS Fig (v2).pdf", height = 12, width = 5)
par(mar=c(5,3,1,5), mfrow = c(3,1), oma = c(0,2,0,0))
bp1 <- barplot(tds.mean, beside = T, density = c(20, 10, 40), angle = 45, col = sppcol,
  ylim = c(0,450), xpd = F, ylab = "")
box()
for(i in 1:length(c(bp1))) arrows(c(bp1)[i], c(tds.upp)[i], c(bp1)[i], c(tds.low)[i], code = 3, angle = 90, length = 0.1)
text(c(tds.upp)+10 ~ c(bp1), labels = c(tds.n))
legend('topright', bty  = "n", fill = sppcol, density = c(20, 10, 40), title = "Species", 
  legend = c("N. ampullaria", "N. gracilis", "N. rafflesiana"))
mtext(side = 3, adj = 0, text = " a)", line = -1.5)

plot(CaCO3 ~ Size.PC1, data = all.dat.Ca,  pch = 21, type = "n",
  ylab = "", xlab = "Pitcher size (PC1)")
polygon(exp(c(rafpred$ci.fit[1,], rev(rafpred$ci.fit[2,]))) ~ c(rafsizes, rev(rafsizes)), border = F, col=sppcol.t[3])
lines(exp(rafpred$fit) ~ rafsizes, lwd=5, col="white", lty = 2)
polygon(exp(c(grapred$ci.fit[1,], rev(grapred$ci.fit[2,]))) ~ c(gnasizes, rev(gnasizes)), border = F, col=sppcol.t[2])
lines(exp(grapred$fit) ~ gnasizes, lwd=5, col="white", lty = 1)
polygon(exp(c(amppred$ci.fit[1,], rev(amppred$ci.fit[2,]))) ~ c(gnasizes, rev(gnasizes)), border = F, col=sppcol.t[1])
lines(exp(amppred$fit) ~ gnasizes, lwd=5, col="white", lty = 3)
points(CaCO3 ~ Size.PC1, data = all.dat.Ca,  pch = 21, cex = 2, col = "white",
  bg = ifelse(all.dat.Ca$Species == "rafflesiana", col.t[3], ifelse(all.dat.Ca$Species == "gracilis", col.t[2], col.t[1])))
mtext(side = 3, adj = 0, text = " b)", line = -1.5)
legend('topright', bty  = "n", col = col.t, pch = 16, pt.cex = 2, title = "Species", 
  legend = c("N. ampullaria", "N. gracilis", "N. rafflesiana"))

plot(hardness ~ day, data = longidat, type = "b", pch = 16, col = sppcol[3], cex = 2, ylim = c(50, 250),
  xlab = "Time since opening (days)")
points(pH*25 ~ day, data = longidat, subset = !is.na(pH), type = "b", pch = 1, cex = 2, 
  col = sppcol.t[3], lty = 2)
legend('topright', bty = "n", pch=c(16,1), legend = c("Total hardness", "pH"), col = c(sppcol[3], sppcol.t[3]), lty = c(1,2))
mtext(side = 3, adj = 0, text = " c)", line = -1.5)
axis(side = 4, at = seq(50, 250, 50), labels = seq(50, 250, 50)/25)
mtext(side = 4, adj = 0.5, text = "pH", line = 2.5)
mtext(side = 2, adj = 0.5, text = "Total hardness (CaCO3 equivalent [mg/L])", outer = T, line = -0.5)
dev.off()

#################
# PROTEIN (AGE) #
#################

#Load first protein experiment (intraspecific study of RAF)
#(excluding zero timepoint where protein is negligible)
prot_expt1<-read.table("ProteinExpt1.short.txt",sep="\t",head=F,row.names=1)
prot_expt1.t<-t(prot_expt1)
prot_expt1.df<-as.data.frame(prot_expt1.t)
prot_expt1.df 
prot1 <- read.csv("ProteinExpt2.short.txt",sep="\t",head=F,row.names=1)

# pH
pH_expt1 <- data.frame(t(read.table("ProteinExpt1_pH.txt", sep="\t",head=F,row.names=1)))
pH_expt1[pH_expt1=="?"] <- NA
pH_expt1 <- rbind(pH_expt1, 
  rep(NA, 11),
  rep(NA, 11),
  rep(NA, 11),
  rep(NA, 11))
pH_expt1$Timepoint <- c(0, 1, 2, 23, 26, 48, 24*3, 24*4, 24*5, 24*6, 24*7)

# reformatting the data for analysis
library(reshape2)
prot_expt1.df$Timepoint <- c(0, 1, 2, 23, 26, 48, 24*3, 24*4, 24*5, 24*6, 24*7)
prot1.melted <- melt(prot_expt1.df, id.vars = 1, measure.vars = 2:10, value.name = "Protein", variable.name = "PitcherID", na.rm = T)
prot1.melted$Age <- substr(prot1.melted$PitcherID, 2, 2)

pH.melted <- melt(pH_expt1, id.vars = 1, measure.vars = 2:10, value.name = "pH", variable.name = "PitcherID", na.rm = F)
combined.melted <- merge(prot1.melted, pH.melted, by.x = c("Timepoint", "PitcherID"), by.y = c("Timepoint", "PitcherID"))
combined.melted$pH <- as.numeric(combined.melted$pH)

# protein concentrations need to be transformed, square root transformation is sufficient
hist(sqrt(combined.melted$Protein))
hist(combined.melted$pH)

# use gamm from mgcv package
library(mgcv)
combined.melted$Age <- factor(combined.melted$Age)
# we can model this in three different ways:
# protein hydrolysis with time follow patterns/trends that are unique to each species
P1.gamm2 <- gamm(sqrt(Protein) ~ s(log(Timepoint+1), by = Age, k = 4) + Age, random = list(PitcherID=~1), data = combined.melted)
# protein hydrolysis with time follow a pattern/trend that is the similar (same shape but different intercept) between all species
P1.gamm1 <- gamm(sqrt(Protein) ~ s(log(Timepoint+1), k = 4) + Age, random = list(PitcherID=~1), data = combined.melted)
# protein hydrolysis with time follow the same pattern/trend regardless of species identity
P1.gamm0 <- gamm(sqrt(Protein) ~ s(log(Timepoint+1), k = 4), random = list(PitcherID=~1), data = combined.melted)

# diagnostic plots
plot(P1.gamm2$lm)
plot(P1.gamm1$lm)
plot(P1.gamm0$lm)

# comparing smooths between the models
plot(P1.gamm2$gam)
plot(P1.gamm1$gam)
plot(P1.gamm0$gam)

P.tab <- AICc(P1.gamm2$lm, P1.gamm1$lm, P1.gamm0$lm)
model.sel(list(P1.gamm2$lm, P1.gamm1$lm, P1.gamm0$lm), rank = "AIC")
summary(P1.gamm2$gam)

# unfortunately, the best model does not include the term age
# ie. protein concentration trends in dead, senescing, living and unopened pitchers are the same

# and the trend looks something like this
plot(P1.gamm0$gam)


#####################
# PROTEIN (SPECIES) #
#####################

#Load 2nd protein experiment (interspecific study, uncooked albumen)
prot_expt2<-read.table("ProteinExpt2_short_v2.txt",sep="\t",head=T,na.strings="?")
prot_expt2 <- prot_expt2[1:15,]
prot_expt2$Size <- apply(apply(apply(prot_expt2[c("Width", "Length")], 2, log), 2, scale), 1, mean)
prot_expt2$Species <- substr(prot_expt2$Pitcher, 0, 3)
head(prot_expt2)
names(prot_expt2)[2:6] <- as.character(0:4)

# lack of variable combinations
with(prot_expt2, table(Species, Condition)) # almost unusable
with(prot_expt2, table(Species, Inquilines))
with(prot_expt2, table(Species, Morph)) # unusable
with(prot_expt2, table(Species, pH))

# reformating the data for analysis
library(reshape2)
prot2.melted <- melt(prot_expt2, id.vars = 1, measure.vars = 2:6, value.name = "Protein", variable.name = "time", na.rm = T)
prot2.melted <- merge(prot2.melted, prot_expt2[c("Pitcher", "Species", "Condition", "Morph", "Size", "Inquilines", "Prey", "pH")], by = "Pitcher")
prot2.melted$time <- as.numeric(as.character(prot2.melted$time))
prot2.melted$Protein <- as.numeric(prot2.melted$Protein)
summary(prot2.melted)

# protein concentrations need to be transformed, square root transformation is sufficient
hist(prot2.melted$Protein <- sqrt(prot2.melted$Protein))

# use gamm from mgcv
library(mgcv)
prot2.melted$Species <- factor(prot2.melted$Species)
# we can model this in three different ways:
# protein hydrolysis with time follow patterns/trends that are unique to each species
P2.spxt <- gamm(sqrt(Protein) ~ s(time, k=3, by = Species) + Species, random = list(Pitcher=~1), data = prot2.melted)
P2.ispxt <- gamm(sqrt(Protein) ~ s(time, k=3, by = Species) + Species + Inquilines, random = list(Pitcher=~1), data = prot2.melted)
P2.pspxt <- gamm(sqrt(Protein) ~ s(time, k=3, by = Species) + Species + pH, random = list(Pitcher=~1), data = prot2.melted)
P2.fspxt <- gamm(sqrt(Protein) ~ s(time, k=3, by = Species) + Species + Prey, random = list(Pitcher=~1), data = prot2.melted)
# protein hydrolysis with time follow a pattern/trend that is the similar (same shape but different intercept) between all species
P2.spt <- gamm(sqrt(Protein) ~ s(time, k=3) + Species, random = list(Pitcher=~1), data = prot2.melted)
P2.ispt <- gamm(sqrt(Protein) ~ s(time, k=3) + Species + Inquilines, random = list(Pitcher=~1), data = prot2.melted)
P2.pspt <- gamm(sqrt(Protein) ~ s(time, k=3) + Species + pH, random = list(Pitcher=~1), data = prot2.melted)
P2.fspt <- gamm(sqrt(Protein) ~ s(time, k=3) + Species + Prey, random = list(Pitcher=~1), data = prot2.melted)

# null model
P2.t <- gamm(sqrt(Protein) ~ s(time, k=3), random = list(Pitcher=~1), data = prot2.melted)

# protein hydrolysis with time follow a pattern/trend that is the similar (same shape but different intercept) between Conditions
P2.ct <- gamm(sqrt(Protein) ~ s(time, k=3) + Condition, random = list(Pitcher=~1), data = prot2.melted)
# protein hydrolysis with time follow a pattern/trend that is the similar (same shape but different intercept) between Inquiline conditions
P2.it <- gamm(sqrt(Protein) ~ s(time, k=3) + Inquilines, random = list(Pitcher=~1), data = prot2.melted)
# prey and pH main effect mods
P2.ft <- gamm(sqrt(Protein) ~ s(time, k=3) + Prey, random = list(Pitcher=~1), data = prot2.melted)
P2.pt <- gamm(sqrt(Protein) ~ s(time, k=3) + pH, random = list(Pitcher=~1), data = prot2.melted)

#diagnostic plot
plot(P2.spxt$lm)

IC <- AIC(P2.ct$lm, P2.fspt$lm, P2.fspxt$lm, P2.ft$lm, P2.ispt$lm, P2.ispxt$lm, P2.ixt$lm, 
    P2.pspt$lm, P2.pspxt$lm, P2.pt$lm, P2.spt$lm, P2.spxt$lm, P2.t$lm)
IC[order(IC$AIC),]

summary(P2.pspxt$lm)
summary(P2.fspxt$lm)
plot(P2.pspxt$gam)
plot(P2.spxt$gam)

# plotting and predicting

newTime <- seq(0,170,1)
P1.gam.pred <- predict(P1.gamm0, newdata = data.frame(Timepoint = newTime), se.fit = T)
agecol.t <- adjustcolor(agecol, alpha.f = 0.5)

# Combined plot
#pdf("Protein degradation.pdf", width = 10, height = 5)
par(mfrow = c(1,2), mar = c(5, 5.5, 2, 0))
plot(Protein ~ Timepoint, data = combined.melted, type = "n", xlab = "",
  ylab = "Protein concentration (mg/dL)", xaxt = "n")
axis(side = 1, at = seq(0,168,24), labels = 0:7)
mtext(side = 3, adj = 0, line = -1, text = " a)")
for(i in unique(combined.melted$PitcherID)){
  lines(Protein ~ Timepoint, data = combined.melted, subset = combined.melted$PitcherID==i,
    col = ifelse(substr(i, 2, 2)=="U", agecol.t[1],
      ifelse(substr(i, 2, 2)=="L", agecol.t[2],
        ifelse(substr(i, 2, 2)=="S", agecol[3], agecol[4]
  ) ) ) )
}

newtime <- seq(0,4,len=100)
polygon(c(P1.gam.pred$fit + P1.gam.pred$se.fit, rev(P1.gam.pred$fit - P1.gam.pred$se.fit))^2 ~
  c(newTime, rev(newTime)), border = F, col = sppcol.t[2])
lines(P1.gam.pred$fit^2 ~ newTime, lwd = 4, col = "white")
legend('topright', title = "Age class", legend = c("Unopened", "Living", "Senescing", "Dead"), lwd = 1, col = agecol, bty = "n")

rafpred <- predict(P2.gamm$gam, newdata = data.frame(
    Timepoint = newtime, Species = "RAF"),
    re.form = NA, se.fit = T)
grapred <- predict(P2.gamm$gam, newdata = data.frame(
    Timepoint = newtime, Species = "GRA"),
    re.form = NA, se.fit = T)
amppred <- predict(P2.gamm$gam, newdata = data.frame(
    Timepoint = seq(0,3,len=100), Species = "AMP"),
    re.form = NA, se.fit = T)

par(mar = c(5, 3.5, 2, 2))
plot(Protein ~ Timepoint, data = prot2.melted, type = "n",
  xlab = "", ylab="", xaxt = "n")
axis(side = 1, at = c(0,1,2,3,4))
prot2.melted$col <- ifelse(prot2.melted$Species=="AMP", sppcol.t[1], ifelse(prot2.melted$Species=="RAF", sppcol.t[3], sppcol.t[2]))
for(i in unique(prot2.melted$PlantID)){
  lines(Protein ~ Timepoint, data = prot2.melted, subset = PlantID == i,
    pch = 16, col = col)
}
mtext(side = 3, adj = 0, line = -1, text = " b)")

polygon(c(rafpred$fit+rafpred$se.fit, rev(rafpred$fit-rafpred$se.fit))^2 ~ 
    c(newtime, rev(newtime)), border = F, col = sppcol.t[2])
polygon(c(grapred$fit+rafpred$se.fit, rev(grapred$fit-rafpred$se.fit))^2 ~ 
    c(newtime, rev(newtime)), border = F, col = sppcol.t[3])
polygon(c(amppred$fit+rafpred$se.fit, rev(amppred$fit-rafpred$se.fit))^2 ~ 
    c(seq(0,3,len=100), rev(seq(0,3,len=100))), border = F, col = sppcol.t[1])
lines(rafpred$fit^2 ~ newtime, col = "white", lwd = 5, lty = 2)
lines(grapred$fit^2 ~ newtime, col = "white", lwd = 5, lty = 1)
lines(amppred$fit^2 ~ seq(0,3,len=100), col = "white", lwd = 5, lty = 3)

legend('topright', lwd = 1, col = sppcol, title = "Species", bty = "n", legend = c("N. ampullaria", "N. gracilis", "N. rafflesiana"))

mtext(side = 1, line = -2, outer = T, text = "Time since albumen addition (days)")
dev.off()

###################################
# pH response to albumen addition #
###################################
combined.melted.nafree <- na.omit(combined.melted)
# we can model this in three different ways:
# protein hydrolysis with time follow patterns/trends that are unique to each species
pH.gamm2 <- gamm(pH ~ s(log(Timepoint+1), by = Age, k = 3) + Age, random = list(PitcherID=~1), data = combined.melted.nafree)
# protein hydrolysis with time follow a pattern/trend that is the similar (same shape but different intercept) between all species
pH.gamm1 <- gamm(pH ~ s(log(Timepoint+1), k = 3) + Age, random = list(PitcherID=~1), data = combined.melted.nafree)
# protein hydrolysis with time follow the same pattern/trend regardless of species identity
pH.gamm0 <- gamm(pH ~ s(log(Timepoint+1), k = 3), random = list(PitcherID=~1), data = combined.melted.nafree)


# diagnostic plots
plot(pH.gamm2$lm)
plot(pH.gamm1$lm)
plot(pH.gamm0$lm)

# comparing smooths between the models
plot(pH.gamm2$gam)
plot(pH.gamm1$gam)
plot(pH.gamm0$gam)

AIC(pH.gamm2$lm, pH.gamm1$lm, pH.gamm0$lm)
# similarly, pH trend is unaffected by age
model.sel(list(pH.gamm2$lm, pH.gamm1$lm, pH.gamm0$lm), rank = "AIC")
summary(pH.gamm2$gam)

newTime <- seq(0,70,1)
pH.gam.pred <- predict(pH.gamm0, newdata = data.frame(Timepoint = newTime), se.fit = T)

#pdf("pH experiment.pdf", width = 6, height = 6)
par(mar = c(5.5,5.5,2,2))
plot(pH ~ Timepoint, data = combined.melted.nafree, type = "n", ylim = c(1, 9), 
  xlab = "Time after addition of albumen (hours)", ylab = "Fluid pH")
polygon(c(pH.gam.pred$fit + pH.gam.pred$se.fit, rev(pH.gam.pred$fit - pH.gam.pred$se.fit)) ~
  c(newTime, rev(newTime)), border = F, col = sppcol.t[2])
lines(pH.gam.pred$fit ~ newTime, lwd = 4, col = "white")
for(i in unique(combined.melted$PitcherID)){
  lines(pH ~ Timepoint, data = combined.melted.nafree, subset = combined.melted.nafree$PitcherID==i,
    col = ifelse(substr(i, 2, 2)=="U", agecol.t[1],
      ifelse(substr(i, 2, 2)=="L", agecol.t[2],
        ifelse(substr(i, 2, 2)=="S", agecol[3], agecol[4]
  ) ) ) )
}
legend('bottomright', title = "Age class", legend = c("Unopened","Living","Senescing","Dead"), col = agecol, lwd = 1, bty = "n")
dev.off()

# protein hydrolysis with time follow patterns/trends that are unique to each species
P1.gamm2 <- gamm(sqrt(Protein) ~ s(Timepoint, by = Age) + Age, random = list(PitcherID=~1), data = combined.melted)
# protein hydrolysis with time follow a pattern/trend that is the similar (same shape but different intercept) between all species
P1.gamm1 <- gamm(sqrt(Protein) ~ s(Timepoint) + Age, random = list(PitcherID=~1), data = combined.melted)
# protein hydrolysis with time follow the same pattern/trend regardless of species identity
P1.gamm0 <- gamm(sqrt(Protein) ~ s(Timepoint), random = list(PitcherID=~1), data = combined.melted)


#######################
#    Monitoring TDS   #
# in a single pitcher #
#######################

# For SI only
ca_time <- read.csv("UnopenedRAFTimeSeries.csv",na.strings="?")

#pdf("TDS with time.pdf", height = 6, width = 6)
plot(ca_time$CaCO3 ~ ca_time$Day, xlab = "Days from pitcher opening", ylab = "CaCO3 equivalent (mg/L)",
  ylim = c(0,250), xlim = c(0,9), lwd = 2.5, lty = 3, pch = 16, cex = 2, type = "b",
  col=sppcol[3], xaxp = c(0,9,9))
dev.off()



####################################
#       ADDITIONAL WORKINGS        #
# SURVEY DATA: PLANT MORPHOMETRICS #
####################################

####### pH Survey ##########

# subset the data (drop ampullaria)
gnr <- nepdata[which(nepdata$Species != "ampullaria"),]
gnr <- gnr[,1:6]
# create a plant-level dataset (since these values are repeated within plant individuals)
gnr.plant <- unique(gnr)
# log-transform the variables to normalize before PCA
gnr.plant$logPlantLength <- log(gnr.plant$Plant.Length + 1)
gnr.plant$logInternode <- log(gnr.plant$Internode.Length)
gnr.plant$logTotalPitchers <- log(gnr.plant$TotalPitchers.by.Nodes)
pairs.cor(gnr.plant[,7:9])
gnr.plant[c("Plant.Length", "Internode.Length", "TotalPitchers.by.Nodes")] <- NULL

# a lot of logTotalPitchers data missing
gnr.plant

# fill in missing logTotalPitchers data by predicting from correlations with the other two vars
missingTP <- which(is.na(gnr.plant$logTotalPitchers))
library(lme4)
library(viridis)

summary(tpm <- lm(logTotalPitchers ~ logPlantLength + logInternode + Species, data = gnr.plant))
gnr.plant$logTotalPitchers[missingTP] <- predict(tpm, newdata = gnr.plant)[missingTP]

# perform plant morphometrics PCA
plantPCA <- prcomp(gnr.plant[c("logTotalPitchers", "logPlantLength", "logInternode" )], scale = T, center = T)
biplot(plantPCA)
# larger, more etiolated plants also have fewer pitchers, 
# but there is also a species effect here (rafflesiana tend to be larger and fewer pitchered)
plot(plantPCA$x[,1:2], pch = 16, cex = 3, col = ifelse(gnr.plant$Species=="rafflesiana", "turquoise", "pink"))

# add this PC1 to original data
nepdata$Plant.PC1 <- plantPCA$x[match(nepdata$PlantID, gnr.plant$PlantID), 1]
summary(nepdata)
#with(nepdata, tapply(Plant.PC1, PlantID, mean))

gnr.dat <- all.dat[-which(is.na(all.dat$Pitcher.Rank)),]
nrow(gnr.dat)

####### TDS Survey ##########

gnr.dat.Ca <- all.dat.Ca[-which(is.na(all.dat.Ca$Pitcher.Rank)),]
nrow(gnr.dat.Ca)

summary(Ca.gnr.full <- lmer(logCaCO3 ~ Species*(Condition + Morph + Prey + Size.PC1 + Pitcher.Rank + Plant.PC1) + (1|Site) + (1|PlantID), 
  data = gnr.dat.Ca, na.action = na.fail, REML = F)) 
Ca.gnr.dredged <- dredge(Ca.gnr.full, m.lim = c(0,6))
head(Ca.gnr.dredged)
summary(get.models(Ca.gnr.dredged, subset=1)[[1]])

par(mar=c(10,5,2,2))
boxplot(logCaCO3 ~ Species*Condition, data = gnr.dat.Ca, las = 3, xlab="", col=c("turquoise", "pink"))
plot(logCaCO3 ~ Size.PC1, data = gnr.dat.Ca,  pch = 16, 
  col = ifelse(gnr.dat.Ca$Species == "rafflesiana", "pink", "turquoise"))





pairs.cor <- function (x,y,smooth=TRUE, digits=2,  ...)
{
  panel.cor <- function(x, y, ...)
  {
    usr <- par("usr"); on.exit(par(usr))
    par(usr = c(0, 1, 0, 1))
    r.obj = cor.test(x, y,use="pairwise",...)
    r = as.numeric(r.obj$estimate)
    txt <- format(c(r, 0.123456789), digits=digits)[1]
    txt <- paste(txt)
    cex <- 0.8/strwidth(txt)
    text(0.5, 0.5, txt, cex=cex*abs(r))
  }
panel.hist <- function(x)
  {
    usr <- par("usr"); on.exit(par(usr))
    par(usr = c(usr[1:2], 0, 1.5) )
    h <- hist(x, plot = FALSE)
    breaks <- h$breaks; nB <- length(breaks)
    y <- h$counts; y <- y/max(y)
    rect(breaks[-nB], 0, breaks[-1], y, col="grey")
  }
pairs(x,diag.panel=panel.hist,lower.panel=panel.cor,upper.panel=panel.smooth, ...)
}
